package com.gglu.www.task.processor;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.mapper.EntityWrapper;
import com.gglu.www.dto.ResultDto;
import com.gglu.www.model.webmagic.Article;
import com.gglu.www.service.webmagic.ArticleService;
import com.gglu.www.util.DateUtil;
import com.gglu.www.util.HttpUtils;
import com.gglu.www.util.SpringContextUtil;
import com.google.common.base.Strings;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 杭州新苗网络科技有限公司
 *
 * @author meihf
 * @create 2018/1/28
 * @description
 */
@Component
public class PageHtmlForXiaoHongshu implements PageProcessor{

    private static final String WOSHIPM = "http://www.woshipm.com/";

    private Logger logger = Logger.getLogger(PageHtmlForXiaoHongshu.class);

    private Site site = Site.me().setDomain(WOSHIPM);

    @Autowired
    ArticleService articleService;

    private static List<String> urlLinks = new ArrayList<>();

    private static List<Article> articleList = new ArrayList<>();

    //根据请求获取url列表
//    http://www.woshipm.com/__api/v1/stream-list?paged=2&action=laodpost


    public List getUrlLinksByPage(String page ) {
        List<String> urls = new ArrayList<>();
        String[] pages = page.split(",");
        for (String p:pages){
            ResultDto resultDto = HttpUtils.doGet("http://www.woshipm.com/__api/v1/stream-list?paged="+p+"&action=laodpost",null);
            JSONObject data = JSON.parseObject(resultDto.getData().toString());
            JSONArray dataArr = data.getJSONArray("payload");
            String id = "";
            String catslug = "";
            String url = "";
            for (Object dataEntity:dataArr) {
                id = ((JSONObject)dataEntity).getString("id");
                catslug  = ((JSONObject) dataEntity).getString("catslug");
                url = WOSHIPM+catslug+"/"+id+".html";
                urls.add(url);
            }
        }
       return urls;
    }

//    @Scheduled(cron = "0/1 * * * * ?")
//    @PostConstruct
    @Scheduled(fixedDelay = 43200000)
    public void saveContentForChanPinJingLi(){
        logger.info("开始抓取产品经理前两页信息");
        Spider.create(new PageHtmlForXiaoHongshu()).addUrl(WOSHIPM)
        .addPipeline(new ConsolePipeline()).thread(1).run();
        //这里不能用多线程否则插入数据库中会出现多条重复记录

    }

    public static void main(String args[]) {
        Spider.create(new PageHtmlForXiaoHongshu()).addUrl(WOSHIPM)
                .addPipeline(new ConsolePipeline()).thread(1).run();
        //这里不能用多线程否则插入数据库中会出现多条重复记录
    }

    @Override
    public void process(Page page) {
        if (!page.getUrl().toString().contains("html")){
            List<String> links = getUrlLinksByPage("1,2");
            page.addTargetRequests(links);
        }else {
            articleService = (ArticleService) SpringContextUtil.getBean("articleService");
            //获取每一篇文章的内容
            //top
            List<String> topData = page.getHtml().xpath("//span[@class='post-meta-item']/text()").all();
            String writeDate = topData.get(0);
            String scan = topData.get(1);
            String star = topData.get(2);
            String praise = topData.get(3);

            String url = page.getUrl().toString();
            String title = page.getHtml().xpath("//h2[@class='article-title']/text()").toString();

            //content
            String content = page.getHtml().xpath("//div[@class='grap']/tidyText()").toString();
            Article article = new Article();
            article.setWriteDate(DateUtil.format2Date(writeDate,DateUtil.YYYY_MM_DD));
            article.setUrl(url);
            article.setScan(scan);
            article.setStar(star);
            article.setPraise(praise);
            article.setTitle(title);
            article.setContent(content);
            article.setCreateDate(new Date());
            if (!Strings.isNullOrEmpty(title) && !Strings.isNullOrEmpty(url)){
                EntityWrapper wrapper = new EntityWrapper();
                wrapper.where(" title={0} and url = {1} ",title,url);
                int urlCount = articleService.selectCount(wrapper);
                if (urlCount == 0){
                    articleService.insert(article);
                }
            }
        }
    }

    @Override
    public Site getSite() {
        return site;
    }
}
